pacman::p_load(tidyverse,xtable,gridExtra,openxlsx)

setwd("")#set working directory 
nh2020 <- read.csv("nh2020-exitpoll.csv") 
yg2023 <- read.csv("yougov23.csv")
pa2020<-read.csv("pa2020-panel.csv")

# figure A6 ----- 
nh2020.short <- nh2020%>%
  filter(., Q1 != "I did not vote in this race") %>%
  filter(., Q2 != "I did not vote in this race") %>%
  filter(., Q1 != "") %>%
  filter(., Q2 != "") %>%
  filter(., Q1 != "Amy Klobuchar,Pete Buttigieg") %>%#invalid 
  filter(., Q2 != "Amy Klobuchar,Pete Buttigieg") %>%#invalid 
  filter(., Q1 != "Andrew Young") %>%#clearing marginal cands
  filter(., Q2 != "Andrew Young") %>%#clearing marginal cands
  filter(., Q1 != "Andrew Yang") %>%#clearing marginal cands
  filter(., Q2 != "Andrew Yang") %>%#clearing marginal cands
  filter(., Q1 != "Tom Steyer") %>%#clearing marginal cands
  filter(., Q2 != "Tom Steyer") %>%#clearing marginal cands
  filter(., Q1 != "Deval Patrick") %>%#clearing marginal cands
  filter(., Q2 != "Deval Patrick") %>%#clearing marginal cands
  filter(., Q2 != "Bernie Sanders,Pete Buttigieg") %>%#invalid 
  filter(., Q2 != "Someone else") %>%
  filter(., Q1 != "Someone else") %>% 
  filter(., Q2 != "Andrew Yang,Bernie Sanders") %>%#invalid 
  filter(., Q1 != "Andrew Yang,Bernie Sanders") %>% #invalid 
  mutate(., Q2 = case_when(
    Q2 == "Michael Bloomberh" ~ "Michael Bloomberg",
    TRUE ~ Q2)) %>%
  mutate(., Q1 = case_when(
    Q1 == "Michael Bloomberh" ~ "Michael Bloomberg",
    TRUE ~ Q1)) %>%
  filter(Q1!="") %>%
  filter(Q2!="") %>%
  group_by(Q1, Q2) %>%
  dplyr::summarise(count = n()) %>%
  pivot_wider(names_from = Q2, values_from = count, values_fill = 0)

sum(nh2020.short[,2:ncol(nh2020.short)])#432 obs

nh2020.short.top <- cbind(nh2020.short$Q1, 100*(nh2020.short[,2:7] / sum(rowSums(nh2020.short[,2:7]))))
nh2020.short.bottom <- cbind(nh2020.short$Q1, 100*(nh2020.short[,2:7] / rowSums(nh2020.short[,2:7])))

nh2020.short.top.tp<-nh2020.short.top%>%
  pivot_longer(c(nh2020.short$Q1))
nh2020.short.bottom.tp<-nh2020.short.bottom%>%
  pivot_longer(c(nh2020.short$Q1))

colnames(nh2020.short.top.tp)<-c("first", "second","count")
colnames(nh2020.short.bottom.tp)<-c("first", "second","count")

matrix.nh2020<-nh2020.short.top.tp%>%
  mutate(first.left=ifelse(first%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  mutate(second.left=ifelse(second%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  group_by(first.left, second.left)%>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  ungroup()%>%
  mutate(fraction=count/sum(count))%>%
  na.omit(.)

matrix.nh2020$fraction[1]#41% moderate camp
matrix.nh2020$fraction[4]#26% left camp
sum(matrix.nh2020$fraction[2:3])#33% switch

chi.nh2020<-nh2020.short%>%
  pivot_longer(c(nh2020.short$Q1))%>%
  mutate(first.left=ifelse(Q1%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  mutate(second.left=ifelse(name%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  group_by(first.left, second.left)%>%
  dplyr::summarize(count = sum(value, na.rm = TRUE))

chisq.test(matrix(chi.nh2020$count, ncol = 2))#42.95 X^2 

nh2020.short.top.tp$count<-round(nh2020.short.top.tp$count,2)
nh2020.short.bottom.tp$count<-round(nh2020.short.bottom.tp$count,2)

nh2020.short.top.tp.p<-ggplot(nh2020.short.top.tp, aes(x = second, y = first, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  
  labs(title = "First and Second Vote Choices, Democratic Primary\nNH Exit Poll, 2020 (% All Respondents)")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )


nh2020.short.bottom.tp.p<-ggplot(nh2020.short.bottom.tp, aes(x = second, y = first, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  
  labs(title = "First and Second Vote Choices, Democratic Primary\nNH Exit Poll, 2020")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

pdf("figureA6.pdf", width = 8, height = 12)
grid.arrange(nh2020.short.top.tp.p, nh2020.short.bottom.tp.p, ncol = 1)
dev.off()

# figure A7 ------ 
pa2020.short <- pa2020%>%
  filter(., upenn_vote_president_primary_2020_dem_first != "I am not a Democrat") %>%
  filter(., upenn_vote_president_primary_2020_dem_second != "I am not a Democrat") %>%
  filter(., upenn_vote_president_primary_2020_dem_first != "") %>%
  filter(., upenn_vote_president_primary_2020_dem_second != "") %>%
  filter(., upenn_vote_president_primary_2020_dem_first != "Tom Steyer") %>%#clearing marginal cands
  filter(., upenn_vote_president_primary_2020_dem_second != "Tom Steyer")  %>%#clearing marginal cands
  filter(., upenn_vote_president_primary_2020_dem_first != "Tulsi Gabbard") %>%#clearing marginal cands
  filter(., upenn_vote_president_primary_2020_dem_second != "Tulsi Gabbard") %>%#clearing marginal cands
  filter(., upenn_vote_president_primary_2020_dem_first != "Someone else") %>%
  filter(., upenn_vote_president_primary_2020_dem_second != "Someone else")%>%
  group_by(upenn_vote_president_primary_2020_dem_first, upenn_vote_president_primary_2020_dem_second) %>%
  dplyr::summarise(count = n())%>%
  pivot_wider(names_from = upenn_vote_president_primary_2020_dem_second, values_from = count, values_fill = 0)

sum(pa2020.short[,2:ncol(pa2020.short)])#1189 obs

pa2020.short.top <- cbind(pa2020.short$upenn_vote_president_primary_2020_dem_first, 100*(pa2020.short[,2:7] / sum(rowSums(pa2020.short[,2:7]))))
pa2020.short.bottom <- cbind(pa2020.short$upenn_vote_president_primary_2020_dem_first, 100*(pa2020.short[,2:7] / rowSums(pa2020.short[,2:7])))
pa2020.short.top #figure A7
pa2020.short.bottom #figure A7

pa2020.short.top.tp<-pa2020.short.top%>%
  pivot_longer(c(pa2020.short$upenn_vote_president_primary_2020_dem_first))
pa2020.short.bottom.tp<-pa2020.short.bottom%>%
  pivot_longer(c(pa2020.short$upenn_vote_president_primary_2020_dem_first))

colnames(pa2020.short.top.tp)<-c("first", "second","count")
colnames(pa2020.short.bottom.tp)<-c("first", "second","count")

matrix.pa2020<-pa2020.short.top.tp%>%
  mutate(first.left=ifelse(first%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  mutate(second.left=ifelse(second%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  group_by(first.left, second.left)%>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  ungroup()%>%
  mutate(fraction=count/sum(count))%>%
  na.omit(.)

matrix.pa2020$fraction[1]#36% moderate camp
matrix.pa2020$fraction[4]#29% left camp
sum(matrix.pa2020$fraction[2:3])#35% switch

chi.pa2020<-pa2020.short%>%
  pivot_longer(c(pa2020.short$upenn_vote_president_primary_2020_dem_first))%>%
  mutate(first.left=ifelse(upenn_vote_president_primary_2020_dem_first%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  mutate(second.left=ifelse(name%in%c("Bernie Sanders", "Elizabeth Warren"),1,0))%>%
  group_by(first.left, second.left)%>%
  dplyr::summarize(count = sum(value, na.rm = TRUE))

chisq.test(matrix(chi.pa2020$count, ncol = 2))#98.757 X^2 

pa2020.short.top.tp$count<-round(pa2020.short.top.tp$count,2)
pa2020.short.bottom.tp$count<-round(pa2020.short.bottom.tp$count,2)

pa2020.short.top.tp.p<-ggplot(pa2020.short.top.tp, aes(x = second, y = first, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  
  labs(title = "First and Second Vote Choices, Democratic Primary\nPA Panel, 2020 (% All Respondents)")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

pa2020.short.bottom.tp.p<-ggplot(pa2020.short.bottom.tp, aes(x = second, y = first, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  
  labs(title = "First and Second Vote Choices, Democratic Primary\nPA Panel, 2020")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

pdf("figureA7.pdf", width = 8, height = 12)
grid.arrange(pa2020.short.top.tp.p, pa2020.short.bottom.tp.p, ncol = 1)
dev.off()

# figure A8 ----- 
yg2023.short <- yg2023%>%
  subset(., select =c(S4_1, S4_3)) %>%
  filter(., S4_1 != "I wouldn’t vote in the Republican primary") %>%
  filter(., S4_3 != "I wouldn’t vote in the Republican primary") %>%
  filter(., !is.na(S4_1)) %>%
  filter(., S4_1 != "Unsure") %>%
  filter(., S4_3 != "Unsure") %>%
  group_by(S4_1, S4_3) %>%
  dplyr::summarise(count = n()) %>%
  pivot_wider(names_from = S4_3, values_from = count, values_fill = 0)

sum(yg2023.short[,2:ncol(yg2023.short)])#315 obs

yg2023.short.top <- cbind(yg2023.short$S4_1, 100*(yg2023.short[,2:8] / sum(rowSums(yg2023.short[,2:8]))))
yg2023.short.bottom <- cbind(yg2023.short$S4_1, 100*(yg2023.short[,2:8] / rowSums(yg2023.short[,2:8])))
yg2023.short.top #figure A8
yg2023.short.bottom #figure A8

yg2023.short.top.tp<-yg2023.short.top%>%
  pivot_longer(c(yg2023.short$S4_1))
yg2023.short.bottom.tp<-yg2023.short.bottom%>%
  pivot_longer(c(yg2023.short$S4_1))

colnames(yg2023.short.top.tp)<-c("first", "second","count")
colnames(yg2023.short.bottom.tp)<-c("first", "second","count")
unique(yg2023.short.top.tp$first)
matrix.yg2023<-yg2023.short.top.tp%>%
  mutate(first.right=ifelse(first%in%c("Donald Trump", "Vivek Ramaswamy", "Ron DeSantis"),1,0))%>%
  mutate(second.right=ifelse(second%in%c("Donald Trump", "Vivek Ramaswamy", "Ron DeSantis"),1,0))%>%
  group_by(first.right, second.right)%>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  ungroup()%>%
  mutate(fraction=count/sum(count))%>%
  na.omit(.)

matrix.yg2023$fraction[1]#6% moderate camp
matrix.yg2023$fraction[4]#65% right camp
sum(matrix.yg2023$fraction[2:3])#29% switch

chi.yg2023<-yg2023.short%>%
  pivot_longer(c(yg2023.short$S4_1))%>%
  mutate(first.right=ifelse(S4_1%in%c("Donald Trump", "Vivek Ramaswamy", "Ron DeSantis"),1,0))%>%
  mutate(second.right=ifelse(name%in%c("Donald Trump", "Vivek Ramaswamy", "Ron DeSantis"),1,0))%>%
  group_by(first.right, second.right)%>%
  dplyr::summarize(count = sum(value, na.rm = TRUE))

chisq.test(matrix(chi.yg2023$count, ncol = 2))#9.14 X^2 

yg2023.short.top.tp$count<-round(yg2023.short.top.tp$count,2)
yg2023.short.bottom.tp$count<-round(yg2023.short.bottom.tp$count,2)

yg2023.short.top.tp.p<-ggplot(yg2023.short.top.tp, aes(x = second, y = first, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "#FF6666", high = "darkred") +  
  labs(title = "First and Second Vote Choices, Republican Primary\nYouGov, 2023 (% All Respondents)")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

yg2023.short.bottom.tp.p<-ggplot(yg2023.short.bottom.tp, aes(x = second, y = first, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "#FF6666", high = "darkred") +  
  labs(title = "First and Second Vote Choices, Republican Primary\nYouGov, 2023")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

pdf("figureA8.pdf", width = 8, height = 12)
grid.arrange(yg2023.short.top.tp.p, yg2023.short.bottom.tp.p, ncol = 1)
dev.off()


# table A12 ----- 

nh2020.desc <- nh2020%>%
  dplyr::select(Q13, Q14, Q15, Q16)

yob <- as.numeric(nh2020.desc$Q16)
for (i in 1:length(yob)){
  if (yob[i] > 100 & !is.na(yob[i])){
    yob[i] <- 2020 - yob[i]
  }
}
  
gender <- cbind("Gender", nh2020.desc %>% count(Q13))
gender$percentage <- gender$n / sum(gender$n)
colnames(gender) <- c("Category", "Label", "Count", "Percentage")
party_id <- cbind("Party ID", nh2020.desc %>% count(Q14))
party_id$percentage <- party_id$n / sum(party_id$n)
colnames(party_id) <- c("Category", "Label", "Count", "Percentage")
educ <- cbind("Education", nh2020.desc %>% count(Q15))
educ$percentage <- educ$n / sum(educ$n)
colnames(educ) <- c("Category", "Label", "Count", "Percentage")
descstat.nh <- rbind(gender, party_id, educ)
descstat.nh$Percentage <- round(descstat.nh$Percentage * 100, 2)
descstat.nh$Label[descstat.nh$Label == ''] <- 'No Response'
descstat.nh$Label[descstat.nh$Label == 'Other:'] <- 'Other'
descstat.nh <- descstat.nh %>%
  filter(!Label%in%c("Democrat,Independent"))#remove invalid response

#add age average
descstat.nh <- rbind(descstat.nh, c("Age", "Age (Average)", sum(!is.na(yob)), round(mean(yob, na.rm=T),2)))

print(xtable(descstat.nh,
             caption=c("Descriptive statistics of the NH 2020 exit poll."),
             label="t:nh:descstats"),
      include.rownames=F,
      file="tableA12.tex")#table A12

# table A13 ----- 
actual.nh <- data.frame(read.xlsx("2020_nh_demprimaryresults.xlsx")) %>%
  subset(., select=c(Var.1, X12)) %>%
  slice(-1) %>%
  na.omit(.)%>%
  mutate(percentage=round(100 * (as.numeric(X12) / sum(as.numeric(X12))),2)) %>% 
  arrange(desc(percentage)) %>%
  filter(., percentage > 2)

observed.nh <- nh2020 %>% 
  count(Q1) %>%
  filter(., Q1 != "") %>%
  filter(., Q1 != "Someone else")%>%
  mutate(percentage=round(100 * (as.numeric(n) / sum(as.numeric(n))),2)) %>% 
  arrange(desc(percentage)) %>%
  filter(., percentage > 2)

#Merge and clean 
colnames(observed.nh) <- c("Candidates", "Total Votes", "Percentage")
colnames(actual.nh) <- c("Candidates", "Total Votes (Actual)", "Percentage (Actual)")
actual.nh$Candidates[actual.nh$Candidates == 'Sanders, d'] <- 'Bernie Sanders'
actual.nh$Candidates[actual.nh$Candidates == 'Buttigieg, d'] <- 'Pete Buttigieg'
actual.nh$Candidates[actual.nh$Candidates == 'Klobuchar, d'] <- 'Amy Klobuchar'
actual.nh$Candidates[actual.nh$Candidates == 'Warren, d'] <- 'Elizabeth Warren'
actual.nh$Candidates[actual.nh$Candidates == 'Biden, d'] <- 'Joe Biden'
actual.nh$Candidates[actual.nh$Candidates == 'Steyer, d'] <- 'Tom Steyer'
actual.nh$Candidates[actual.nh$Candidates == 'Gabbard, d'] <- 'Tulsi Gabbard'
actual.nh$Candidates[actual.nh$Candidates == 'Yang, d'] <- 'Andrew Yang'
observed.nh$Candidates[observed.nh$Candidates == 'Michael Bloomberh'] <- 'Michael Bloomberg'
merged <- observed.nh %>%
  full_join(actual.nh, by="Candidates")
print(xtable(merged, 
             caption=c("A comparison of the NH 2020 exit poll primary election results with the actual primary election results. While Bloomberg was an option in our exit poll, he was not on the ballot in the NH primary."),
             label="t:nh:benchmarks",
             digits=2), 
      include.rownames=F,
      file="tableA13.tex") #table A13